<?php

if (!defined('IN_SEXMALL'))
{
	die('Hacking attempt');
}

class Chinese
{
	/**
	 * 存放 GB <-> UNICODE 对照表的内容
	 * @变量类型
	 * @访问      内部
	 */
	var $unicode_table = array();

	/**
	 * 访问中文繁简互换表的文件指针
	 *
	 * @变量类型  对象
	 * @访问      内部
	 */
	var $ctf;

	/**
	 * 等待转换的字符串
	 * @变量类型
	 * @访问      内部
	 */
	var $SourceText = '';

	/**
	 * Chinese 的运行配置
	 *
	 * @变量类型  数组
	 * @访问      公开
	 */
	var $config = array(
        'codetable_dir'    => '',                // 存放各种语言互换表的目录
        'source_lang'      => '',                // 字符的原编码
        'target_lang'      => '',                // 转换后的编码
        'GBtoBIG5_table'   => 'gb-big5.table',   // 简体中文转换为繁体中文的对照表
        'BIG5toGB_table'   => 'big5-gb.table',   // 繁体中文转换为简体中文的对照表
        'GBtoUTF8_table'   => 'gb_utf8.php',     // 简体中文转换为UTF-8的对照表
        'BIG5toUTF8_table' => 'big5_utf8.php'    // 繁体中文转换为UTF-8的对照表
	);

	var $iconv_enabled    = false; // 是否存在 ICONV 模块，默认为否
	var $mbstring_enabled = false; // 是否存在 MBSTRING 模块，默认为否


	/**
	 * Chinese 的悉构函数
	 *
	 * 详细说明
	 * @形参      字符串 $source_lang 为需要转换的字符串的原编码
	 *            字符串 $target_lang 为转换的目标编码
	 *            字符串 $SourceText 为等待转换的字符串
	 * @访问      公开
	 * @返回值    无
	 * @throws
	 */
	function Chinese($dir = './')
	{
		$this->config['codetable_dir'] = $dir . "includes/codetable/";

		if (function_exists('iconv'))
		{
			$this->iconv_enabled = true;
		}

		if (PHP_VERSION >= '5.0' && function_exists('mb_convert_encoding') && function_exists('mb_list_encodings'))
		{
			$encodings = mb_list_encodings();

			if (in_array('UTF-8', $encodings) == true && in_array('BIG-5', $encodings) == true && in_array('CP936', $encodings) == true) // CP936 就是 GBK 字符集的别名
			{
				$this->mbstring_enabled = true;
			}
		}
	}

	function Convert($source_lang, $target_lang, $source_string = '')
	{
		/* 如果字符串为空或者字符串不需要转换，直接返回 */
		if ($source_string == '' || preg_match("/[\x80-\xFF]+/", $source_string) == 0)
		{
			return $source_string;
		}

		if ($source_lang)
		{
			$this->config['source_lang'] = $this->_lang($source_lang);
		}

		if ($target_lang)
		{
			$this->config['target_lang'] = $this->_lang($target_lang);
		}

		/* 如果编码相同，直接返回 */
		if ($this->config['source_lang'] == $this->config['target_lang'])
		{
			return $source_string;
		}

		$this->SourceText = $source_string;

		if (($this->iconv_enabled || $this->mbstring_enabled) && !($this->config['source_lang'] == 'GBK' && $this->config['target_lang'] == 'BIG-5'))
		{
			if ($this->config['target_lang'] != 'UNICODE')
			{
				$string = $this->_convert_iconv_mbstring($this->SourceText, $this->config['target_lang'], $this->config['source_lang']);

				/* 如果正确转换 */
				if ($string)
				{
					return $string;
				}
			}
			else
			{
				$string = '';
				$text = $SourceText;
				while ($text)
				{
					if (ord(substr($text, 0, 1)) > 127)
					{
						if ($this->config['source_lang'] != 'UTF-8')
						{
							$char = $this->_convert_iconv_mbstring(substr($text, 0, 2), 'UTF-8', $this->config['source_lang']);
						}
						else
						{
							$char = substr($text, 0, 3);
						}
						/* 如果转换出错 */
						if ($char == '')
						{
							$string = '';

							break;
						}

						switch (strlen($char))
						{
							case 1:
								$uchar  = ord($char);
								break;

							case 2:
								$uchar  = (ord($char[0]) & 0x3f) << 6;
								$uchar += ord($char[1])  & 0x3f;
								break;

							case 3:
								$uchar  = (ord($char[0]) & 0x1f) << 12;
								$uchar += (ord($char[1]) & 0x3f) << 6;
								$uchar += ord($char[2])  & 0x3f;
								break;

							case 4:
								$uchar  = (ord($char[0]) & 0x0f) << 18;
								$uchar += (ord($char[1]) & 0x3f) << 12;
								$uchar += (ord($char[2]) & 0x3f) << 6;
								$uchar += ord($char[3])  & 0x3f;
								break;
						}
						$string .= '&#x' . dechex($uchar) . ';';

						if ($this->config['source_lang'] != 'UTF-8')
						{
							$text = substr($text, 2);
						}
						else
						{
							$text = substr($text, 3);
						}
					}
					else
					{
						$string .= substr($text, 0, 1);
						$text    = substr($text, 1);
					}
				}

				/* 如果正确转换 */
				if ($string)
				{
					return $string;
				}
			}
		}

		$this->OpenTable();
		// 判断是否为中文繁、简转换
		if (($this->config['source_lang'] == 'GBK' || $this->config['source_lang'] == 'BIG-5') && ($this->config['target_lang'] == 'GBK' || $this->config['target_lang'] == 'BIG-5'))
		{
			return $this->GBtoBIG5();
		}

		// 判断是否为简体、繁体中文与UTF8转换
		if (($this->config['source_lang'] == 'GBK' || $this->config['source_lang'] == 'BIG-5' || $this->config['source_lang'] == 'UTF-8') && ($this->config['target_lang'] == 'UTF-8' || $this->config['target_lang'] == 'GBK' || $this->config['target_lang'] == 'BIG-5'))
		{
			return $this->CHStoUTF8();
		}

		// 判断是否为简体、繁体中文与UNICODE转换
		if (($this->config['source_lang'] == 'GBK' || $this->config['source_lang'] == 'BIG-5') && $this->config['target_lang'] == 'UNICODE')
		{
			return $this->CHStoUNICODE();
		}
	}

	function _lang($lang)
	{
		$lang = strtoupper($lang);

		if (substr($lang, 0, 2) == 'GB')
		{
			return 'GBK';
		}
		else
		{
			switch(substr($lang, 0, 3))
			{
				case 'BIG':
					return 'BIG-5';

				case 'UTF':
					return 'UTF-8';

				case 'UNI':
					return 'UNICODE';

				default:
					return '';
			}
		}
	}

	function _convert_iconv_mbstring($string, $target_lang, $source_lang)
	{
		if ($this->iconv_enabled)
		{
			$return_string = @iconv($source_lang, $target_lang, $string);
			if ($return_string !== false)
			{
				return $return_string;
			}
		}

		if ($this->mbstring_enabled)
		{
			if ($source_lang == 'GBK')
			{
				$source_lang = 'CP936';
			}
			if ($target_lang == 'GBK')
			{
				$target_lang = 'CP936';
			}

			$return_string = @mb_convert_encoding($string, $target_lang, $source_lang);
			if ($return_string !== false)
			{
				return $return_string;
			}
			else
			{
				return false;
			}
		}
	}

	/**
	 * 将 16 进制转换为 2 进制字符
	 *
	 * 详细说明
	 * @形参      $hexdata 为16进制的编码
	 * @访问      内部
	 * @返回      字符串
	 * @throws
	 */
	function _hex2bin($hexdata)
	{
		$bindata = '';

		for ($i = 0, $count = strlen($hexdata); $i < $count; $i += 2)
		{
			$bindata .= chr(hexdec($hexdata{$i} . $hexdata{$i + 1}));
		}

		return $bindata;
	}

	/**
	 * 打开对照表
	 *
	 * 详细说明
	 * @形参
	 * @访问      内部
	 * @返回      无
	 * @throws
	 */
	function OpenTable()
	{
		static $gb_utf8_table      = NULL;
		static $gb_unicode_table   = NULL;
		static $utf8_gb_table      = NULL;

		static $big5_utf8_table    = NULL;
		static $big5_unicode_table = NULL;
		static $utf8_big5_table    = NULL;

		// 假如原编码为简体中文的话
		if ($this->config['source_lang'] == 'GBK')
		{
			// 假如转换目标编码为繁体中文的话
			if ($this->config['target_lang'] == 'BIG-5')
			{
				$this->ctf = @fopen($this->config['codetable_dir'] . $this->config['GBtoBIG5_table'], 'rb');
				if (is_null($this->ctf))
				{
					echo '打开打开转换表文件失败！';

					exit;
				}
			}

			// 假如转换目标编码为 UTF8 的话
			if ($this->config['target_lang'] == 'UTF-8')
			{
				if ($gb_utf8_table === NULL)
				{
					require_once($this->config['codetable_dir'] . $this->config['GBtoUTF8_table']);
				}
				$this->unicode_table = $gb_utf8_table;
			}

			// 假如转换目标编码为 UNICODE 的话
			if ($this->config['target_lang'] == 'UNICODE')
			{
				if ($gb_unicode_table === NULL)
				{
					if (isset($gb_utf8_table) === false)
					{
						require_once($this->config['codetable_dir'] . $this->config['GBtoUTF8_table']);
					}
					foreach ($gb_utf8_table AS $key => $value)
					{
						$gb_unicode_table[$key] = substr($value, 2);
					}
				}
				$this->unicode_table = $gb_unicode_table;
			}
		}

		// 假如原编码为繁体中文的话
		if ($this->config['source_lang'] == 'BIG-5')
		{
			// 假如转换目标编码为简体中文的话
			if ($this->config['target_lang'] == 'GBK')
			{
				$this->ctf = @fopen($this->config['codetable_dir'] . $this->config['BIG5toGB_table'], 'rb');
				if (is_null($this->ctf))
				{
					echo '打开打开转换表文件失败！';

					exit;
				}
			}
			// 假如转换目标编码为 UTF8 的话
			if ($this->config['target_lang'] == 'UTF-8')
			{
				if ($big5_utf8_table === NULL)
				{
					require_once($this->config['codetable_dir'] . $this->config['BIG5toUTF8_table']);
				}
				$this->unicode_table = $big5_utf8_table;
			}

			// 假如转换目标编码为 UNICODE 的话
			if ($this->config['target_lang'] == 'UNICODE')
			{
				if ($big5_unicode_table === NULL)
				{
					if (isset($big5_utf8_table) === false)
					{
						require_once($this->config['codetable_dir'] . $this->config['BIG5toUTF8_table']);
					}
					foreach ($big5_utf8_table AS $key => $value)
					{
						$big5_unicode_table[$key] = substr($value, 2);
					}
				}
				$this->unicode_table = $big5_unicode_table;
			}
		}

		// 假如原编码为 UTF8 的话
		if ($this->config['source_lang'] == 'UTF-8')
		{
			// 假如转换目标编码为 GBK 的话
			if ($this->config['target_lang'] == 'GBK')
			{
				if ($utf8_gb_table === NULL)
				{
					if (isset($gb_utf8_table) === false)
					{
						require_once($this->config['codetable_dir'] . $this->config['GBtoUTF8_table']);
					}
					foreach ($gb_utf8_table AS $key => $value)
					{
						$utf8_gb_table[hexdec($value)] = '0x' . dechex($key);
					}
				}
				$this->unicode_table = $utf8_gb_table;
			}

			// 假如转换目标编码为 BIG5 的话
			if ($this->config['target_lang'] == 'BIG-5')
			{
				if ($utf8_big5_table === NULL)
				{
					if (isset($big5_utf8_table) === false)
					{
						require_once($this->config['codetable_dir'] . $this->config['BIG5toUTF8_table']);
					}
					foreach ($big5_utf8_table AS $key => $value)
					{
						$utf8_big5_table[hexdec($value)] = '0x' . dechex($key);
					}
				}
				$this->unicode_table = $utf8_big5_table;
			}
		}
	}

	/**
	 * 将简体、繁体中文的 UNICODE 编码转换为 UTF8 字符
	 *
	 * 详细说明
	 * @形参      数字 $c 简体中文汉字的UNICODE编码的10进制
	 * @访问      内部
	 * @返回      字符串
	 * @throws
	 */
	function CHSUtoUTF8($c)
	{
		$str='';

		if ($c < 0x80)
		{
			$str .= $c;
		}
		elseif ($c < 0x800)
		{
			$str .= (0xC0 | $c >> 6);
			$str .= (0x80 | $c & 0x3F);
		}
		elseif ($c < 0x10000)
		{
			$str .= (0xE0 | $c >> 12);
			$str .= (0x80 | $c >> 6 & 0x3F);
			$str .= (0x80 | $c & 0x3F);
		}
		elseif ($c < 0x200000)
		{
			$str .= (0xF0 | $c >> 18);
			$str .= (0x80 | $c >> 12 & 0x3F);
			$str .= (0x80 | $c >> 6  & 0x3F);
			$str .= (0x80 | $c & 0x3F);
		}

		return $str;
	}

	/**
	 * 简体、繁体中文 <-> UTF8 互相转换的函数
	 *
	 * 详细说明
	 * @形参
	 * @访问      内部
	 * @返回      字符串
	 * @throws
	 */
	function CHStoUTF8()
	{
		if ($this->config['source_lang'] == 'BIG-5' || $this->config['source_lang'] == 'GBK')
		{
			$ret = '';

			while ($this->SourceText)
			{
				if (ord($this->SourceText{0}) > 127)
				{
					if ($this->config['source_lang'] == 'BIG-5')
					{
						$utf8 = $this->CHSUtoUTF8(hexdec(@$this->unicode_table[hexdec(bin2hex($this->SourceText{0} . $this->SourceText{1}))]));
					}
					if ($this->config['source_lang'] == 'GBK')
					{
						$utf8 = $this->CHSUtoUTF8(hexdec(@$this->unicode_table[hexdec(bin2hex($this->SourceText{0} . $this->SourceText{1})) - 0x8080]));
					}
					for ($i = 0, $count = strlen($utf8); $i < $count; $i += 3)
					{
						$ret .= chr(substr($utf8, $i, 3));
					}

					$this->SourceText = substr($this->SourceText, 2, strlen($this->SourceText));
				}
				else
				{
					$ret .= $this->SourceText{0};
					$this->SourceText = substr($this->SourceText, 1, strlen($this->SourceText));
				}
			}
			$this->unicode_table = array();
			$this->SourceText = '';

			return $ret;
		}

		if ($this->config['source_lang'] == 'UTF-8')
		{
			$i   = 0;
			$out = '';
			$len = strlen($this->SourceText);
			while ($i < $len)
			{
				$c = ord($this->SourceText{$i++});
				switch($c >> 4)
				{
					case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
						// 0xxxxxxx
						$out .= $this->SourceText{$i - 1};
						break;
					case 12: case 13:
						// 110x xxxx   10xx xxxx
						$char2 = ord($this->SourceText{$i++});
						$char3 = @$this->unicode_table[(($c & 0x1F) << 6) | ($char2 & 0x3F)];

						if ($this->config['target_lang'] == 'GBK')
						{
							$out .= $this->_hex2bin(dechex($char3 + 0x8080));
						}
						elseif ($this->config['target_lang'] == 'BIG-5')
						{
							$out .= $this->_hex2bin(dechex($char3 + 0x0000));
						}
						break;
					case 14:
						// 1110 xxxx  10xx xxxx  10xx xxxx
						$char2 = ord($this->SourceText{$i++});
						$char3 = ord($this->SourceText{$i++});
						$char4 = @$this->unicode_table[(($c & 0x0F) << 12) | (($char2 & 0x3F) << 6) | (($char3 & 0x3F) << 0)];

						if ($this->config['target_lang'] == 'GBK')
						{
							$out .= $this->_hex2bin(dechex($char4 + 0x8080));
						} elseif ($this->config['target_lang'] == 'BIG-5')
						{
							$out .= $this->_hex2bin(dechex($char4 + 0x0000));
						}

						break;
				}
			}

			// 返回结果
			return $out;
		}
	}

	/**
	 * 简体、繁体中文转换为 UNICODE编码
	 *
	 * 详细说明
	 * @形参
	 * @访问      内部
	 * @返回      字符串
	 * @throws
	 */
	function CHStoUNICODE()
	{
		$utf = '';

		while ($this->SourceText)
		{
			if (ord($this->SourceText{0}) > 127)
			{
				if ($this->config['source_lang'] == 'GBK')
				{
					$utf .= '&#x' . $this->unicode_table[hexdec(bin2hex($this->SourceText{0} . $this->SourceText{1})) - 0x8080] . ';';
				}
				elseif ($this->config['source_lang'] == 'BIG-5')
				{
					$utf .= '&#x' . $this->unicode_table[hexdec(bin2hex($this->SourceText{0} . $this->SourceText{1}))] . ';';
				}

				$this->SourceText = substr($this->SourceText, 2, strlen($this->SourceText));
			}
			else
			{
				$utf .= $this->SourceText{0};
				$this->SourceText = substr($this->SourceText, 1, strlen($this->SourceText));
			}
		}

		return $utf;
	}

	/**
	 * 简体中文 <-> 繁体中文 互相转换的函数
	 *
	 * 详细说明
	 * @访问      内部
	 * @返回值    经过编码的utf8字符
	 * @throws
	 */
	function GBtoBIG5()
	{
		// 获取等待转换的字符串的总长度
		$max = strlen($this->SourceText) - 1;

		for ($i = 0; $i < $max; $i++)
		{
			$h = ord($this->SourceText{$i});
			if ($h >= 160)
			{
				$l = ord($this->SourceText{$i + 1});

				if ($h == 161 && $l == 64)
				{
					$gb = '  ';
				}
				else
				{
					fseek($this->ctf, ($h - 160) * 510 + ($l - 1) * 2);
					$gb = fread($this->ctf, 2);
				}

				$this->SourceText{$i}     = $gb{0};
				$this->SourceText{$i + 1} = $gb{1};

				$i++;
			}
		}
		fclose($this->ctf);

		// 将转换后的结果赋予 $result;
		$result = $this->SourceText;

		// 清空 $thisSourceText
		$this->SourceText = '';

		// 返回转换结果
		return $result;
	}
}

?>
